package core.rdd.分区;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;

/**
 * 默认分区方式是近似的均分
 */

public class Spark01_PART {
    public static void main(String[] args) {

        // 配置SparkConf指向你的Spark master URL
        SparkConf conf = new SparkConf()
                .setAppName("Spark01_PART") // 应用名称
                .setMaster("local[*]"); // 替换成你的master地址
        JavaSparkContext jsc = new JavaSparkContext(conf);

        // 创建JavaSparkContext，它是与集群交互的主要入口点
        try {
            Tuple2<String, Integer> tuple1 = new Tuple2<>("a", 123);
            Tuple2<String, Integer> tuple2 = new Tuple2<>("b", 456);
            Tuple2<String, Integer> tuple3 = new Tuple2<>("c", 5);
            Tuple2<String, Integer> tuple4 = new Tuple2<>("d", 6);
            Tuple2<String, Integer> tuple5 = new Tuple2<>("e", 2);
            Tuple2<String, Integer> tuple6 = new Tuple2<>("f", 4);


            List<Tuple2<String, Integer>> tuple2s = Arrays.asList(
                    tuple1, tuple2, tuple3, tuple5, tuple4, tuple6
            );


            JavaRDD<Tuple2<String, Integer>> rdd = jsc.parallelize(tuple2s,4);


        } finally {
            jsc.close();
        }
    }
}
